# Importing the dataset
marvel <- read.csv("../data/marvel.csv")
dc <- read.csv("../data/dc.csv")
head(marvel)
##   page_id                                  name
## 1    1678             Spider-Man (Peter Parker)
## 2    7139       Captain America (Steven Rogers)
## 3   64786 Wolverine (James \\"Logan\\" Howlett)
## 4    1868   Iron Man (Anthony \\"Tony\\" Stark)
## 5    2460                   Thor (Thor Odinson)
## 6    2458            Benjamin Grimm (Earth-616)
##                                    urlslug               ID              ALIGN
## 1             \\/Spider-Man_(Peter_Parker)  Secret Identity    Good Characters
## 2       \\/Captain_America_(Steven_Rogers)  Public Identity    Good Characters
## 3 \\/Wolverine_(James_%22Logan%22_Howlett)  Public Identity Neutral Characters
## 4   \\/Iron_Man_(Anthony_%22Tony%22_Stark)  Public Identity    Good Characters
## 5                   \\/Thor_(Thor_Odinson) No Dual Identity    Good Characters
## 6            \\/Benjamin_Grimm_(Earth-616)  Public Identity    Good Characters
##          EYE       HAIR             SEX GSM             ALIVE APPEARANCES
## 1 Hazel Eyes Brown Hair Male Characters     Living Characters        4043
## 2  Blue Eyes White Hair Male Characters     Living Characters        3360
## 3  Blue Eyes Black Hair Male Characters     Living Characters        3061
## 4  Blue Eyes Black Hair Male Characters     Living Characters        2961
## 5  Blue Eyes Blond Hair Male Characters     Living Characters        2258
## 6  Blue Eyes    No Hair Male Characters     Living Characters        2255
##   FIRST.APPEARANCE Year
## 1           Aug-62 1962
## 2           Mar-41 1941
## 3           Oct-74 1974
## 4           Mar-63 1963
## 5           Nov-50 1950
## 6           Nov-61 1961
head(dc)
##   page_id                        name                               urlslug
## 1    1422        Batman (Bruce Wayne)        \\/wiki\\/Batman_(Bruce_Wayne)
## 2   23387       Superman (Clark Kent)       \\/wiki\\/Superman_(Clark_Kent)
## 3    1458  Green Lantern (Hal Jordan)  \\/wiki\\/Green_Lantern_(Hal_Jordan)
## 4    1659    James Gordon (New Earth)    \\/wiki\\/James_Gordon_(New_Earth)
## 5    1576 Richard Grayson (New Earth) \\/wiki\\/Richard_Grayson_(New_Earth)
## 6    1448 Wonder Woman (Diana Prince) \\/wiki\\/Wonder_Woman_(Diana_Prince)
##                ID           ALIGN        EYE       HAIR               SEX GSM
## 1 Secret Identity Good Characters  Blue Eyes Black Hair   Male Characters    
## 2 Secret Identity Good Characters  Blue Eyes Black Hair   Male Characters    
## 3 Secret Identity Good Characters Brown Eyes Brown Hair   Male Characters    
## 4 Public Identity Good Characters Brown Eyes White Hair   Male Characters    
## 5 Secret Identity Good Characters  Blue Eyes Black Hair   Male Characters    
## 6 Public Identity Good Characters  Blue Eyes Black Hair Female Characters    
##               ALIVE APPEARANCES FIRST.APPEARANCE YEAR
## 1 Living Characters        3093        1939, May 1939
## 2 Living Characters        2496    1986, October 1986
## 3 Living Characters        1565    1959, October 1959
## 4 Living Characters        1316   1987, February 1987
## 5 Living Characters        1237      1940, April 1940
## 6 Living Characters        1231   1941, December 1941
print(dim(marvel))
## [1] 16376    13
print(dim(dc))
## [1] 6896   13
summary(marvel)
##     page_id           name             urlslug               ID           
##  Min.   :  1025   Length:16376       Length:16376       Length:16376      
##  1st Qu.: 28310   Class :character   Class :character   Class :character  
##  Median :282578   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :300232                                                           
##  3rd Qu.:509077                                                           
##  Max.   :755278                                                           
##                                                                           
##     ALIGN               EYE                HAIR               SEX           
##  Length:16376       Length:16376       Length:16376       Length:16376      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      GSM               ALIVE            APPEARANCES      FIRST.APPEARANCE  
##  Length:16376       Length:16376       Min.   :   1.00   Length:16376      
##  Class :character   Class :character   1st Qu.:   1.00   Class :character  
##  Mode  :character   Mode  :character   Median :   3.00   Mode  :character  
##                                        Mean   :  17.03                     
##                                        3rd Qu.:   8.00                     
##                                        Max.   :4043.00                     
##                                        NA's   :1096                        
##       Year     
##  Min.   :1939  
##  1st Qu.:1974  
##  Median :1990  
##  Mean   :1985  
##  3rd Qu.:2000  
##  Max.   :2013  
##  NA's   :815
summary(dc)
##     page_id           name             urlslug               ID           
##  Min.   :  1380   Length:6896        Length:6896        Length:6896       
##  1st Qu.: 44106   Class :character   Class :character   Class :character  
##  Median :141267   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :147441                                                           
##  3rd Qu.:213203                                                           
##  Max.   :404010                                                           
##                                                                           
##     ALIGN               EYE                HAIR               SEX           
##  Length:6896        Length:6896        Length:6896        Length:6896       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      GSM               ALIVE            APPEARANCES      FIRST.APPEARANCE  
##  Length:6896        Length:6896        Min.   :   1.00   Length:6896       
##  Class :character   Class :character   1st Qu.:   2.00   Class :character  
##  Mode  :character   Mode  :character   Median :   6.00   Mode  :character  
##                                        Mean   :  23.63                     
##                                        3rd Qu.:  15.00                     
##                                        Max.   :3093.00                     
##                                        NA's   :355                         
##       YEAR     
##  Min.   :1935  
##  1st Qu.:1983  
##  Median :1992  
##  Mean   :1990  
##  3rd Qu.:2003  
##  Max.   :2013  
##  NA's   :69
# Cleaning the datasets
colSums(is.na(marvel))
##          page_id             name          urlslug               ID 
##                0                0                0                0 
##            ALIGN              EYE             HAIR              SEX 
##                0                0                0                0 
##              GSM            ALIVE      APPEARANCES FIRST.APPEARANCE 
##                0                0             1096                0 
##             Year 
##              815
colSums(is.na(dc))
##          page_id             name          urlslug               ID 
##                0                0                0                0 
##            ALIGN              EYE             HAIR              SEX 
##                0                0                0                0 
##              GSM            ALIVE      APPEARANCES FIRST.APPEARANCE 
##                0                0              355                0 
##             YEAR 
##               69
cols <- c('page_id', 'urlslug', 'GSM', 'Year')
marvel <- marvel %>% select(-cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols)
## 
##   # Now:
##   data %>% select(all_of(cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
cols <- c('page_id', 'urlslug', 'GSM', 'YEAR')
dc <- dc %>% select(-cols)

head(marvel)
##                                    name               ID              ALIGN
## 1             Spider-Man (Peter Parker)  Secret Identity    Good Characters
## 2       Captain America (Steven Rogers)  Public Identity    Good Characters
## 3 Wolverine (James \\"Logan\\" Howlett)  Public Identity Neutral Characters
## 4   Iron Man (Anthony \\"Tony\\" Stark)  Public Identity    Good Characters
## 5                   Thor (Thor Odinson) No Dual Identity    Good Characters
## 6            Benjamin Grimm (Earth-616)  Public Identity    Good Characters
##          EYE       HAIR             SEX             ALIVE APPEARANCES
## 1 Hazel Eyes Brown Hair Male Characters Living Characters        4043
## 2  Blue Eyes White Hair Male Characters Living Characters        3360
## 3  Blue Eyes Black Hair Male Characters Living Characters        3061
## 4  Blue Eyes Black Hair Male Characters Living Characters        2961
## 5  Blue Eyes Blond Hair Male Characters Living Characters        2258
## 6  Blue Eyes    No Hair Male Characters Living Characters        2255
##   FIRST.APPEARANCE
## 1           Aug-62
## 2           Mar-41
## 3           Oct-74
## 4           Mar-63
## 5           Nov-50
## 6           Nov-61
head(dc)
##                          name              ID           ALIGN        EYE
## 1        Batman (Bruce Wayne) Secret Identity Good Characters  Blue Eyes
## 2       Superman (Clark Kent) Secret Identity Good Characters  Blue Eyes
## 3  Green Lantern (Hal Jordan) Secret Identity Good Characters Brown Eyes
## 4    James Gordon (New Earth) Public Identity Good Characters Brown Eyes
## 5 Richard Grayson (New Earth) Secret Identity Good Characters  Blue Eyes
## 6 Wonder Woman (Diana Prince) Public Identity Good Characters  Blue Eyes
##         HAIR               SEX             ALIVE APPEARANCES FIRST.APPEARANCE
## 1 Black Hair   Male Characters Living Characters        3093        1939, May
## 2 Black Hair   Male Characters Living Characters        2496    1986, October
## 3 Brown Hair   Male Characters Living Characters        1565    1959, October
## 4 White Hair   Male Characters Living Characters        1316   1987, February
## 5 Black Hair   Male Characters Living Characters        1237      1940, April
## 6 Black Hair Female Characters Living Characters        1231   1941, December
marvel <- marvel %>% drop_na(FIRST.APPEARANCE)
dc <- dc %>% drop_na(FIRST.APPEARANCE)

marvel <- marvel %>% separate(FIRST.APPEARANCE, c("MONTH", "YEAR"), "-")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 815 rows [13, 39,
## 81, 115, 260, 311, 414, 684, 790, 855, 998, 1119, 1159, 1317, 1455, 1565, 1566,
## 1846, 1938, 2034, ...].
dc <- dc %>% separate(FIRST.APPEARANCE, c("YEAR", "MONTH"), ", ")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 213 rows [210,
## 261, 338, 364, 387, 558, 584, 618, 643, 715, 812, 844, 890, 1115, 1201, 1287,
## 1352, 1355, 1401, 1402, ...].
marvel$YEAR <- ifelse(as.integer(marvel$YEAR) > 21, paste0('19', marvel$YEAR), paste0('20', marvel$YEAR))

head(marvel)
##                                    name               ID              ALIGN
## 1             Spider-Man (Peter Parker)  Secret Identity    Good Characters
## 2       Captain America (Steven Rogers)  Public Identity    Good Characters
## 3 Wolverine (James \\"Logan\\" Howlett)  Public Identity Neutral Characters
## 4   Iron Man (Anthony \\"Tony\\" Stark)  Public Identity    Good Characters
## 5                   Thor (Thor Odinson) No Dual Identity    Good Characters
## 6            Benjamin Grimm (Earth-616)  Public Identity    Good Characters
##          EYE       HAIR             SEX             ALIVE APPEARANCES MONTH
## 1 Hazel Eyes Brown Hair Male Characters Living Characters        4043   Aug
## 2  Blue Eyes White Hair Male Characters Living Characters        3360   Mar
## 3  Blue Eyes Black Hair Male Characters Living Characters        3061   Oct
## 4  Blue Eyes Black Hair Male Characters Living Characters        2961   Mar
## 5  Blue Eyes Blond Hair Male Characters Living Characters        2258   Nov
## 6  Blue Eyes    No Hair Male Characters Living Characters        2255   Nov
##   YEAR
## 1 1962
## 2 1941
## 3 1974
## 4 1963
## 5 1950
## 6 1961
marvel$MONTH <- recode(marvel$MONTH,
                       'Jan' = "January",
                       'Feb' = "February",
                       'Mar' = "March",
                       'Apr' = "April",
                       'Jun' = "June",
                       'Jul' = "July",
                       'Aug' = "August",
                       'Sep' = "September",
                       'Oct' = "October",
                       'Nov' = "November",
                       'Dec' = "December")


head(marvel)
##                                    name               ID              ALIGN
## 1             Spider-Man (Peter Parker)  Secret Identity    Good Characters
## 2       Captain America (Steven Rogers)  Public Identity    Good Characters
## 3 Wolverine (James \\"Logan\\" Howlett)  Public Identity Neutral Characters
## 4   Iron Man (Anthony \\"Tony\\" Stark)  Public Identity    Good Characters
## 5                   Thor (Thor Odinson) No Dual Identity    Good Characters
## 6            Benjamin Grimm (Earth-616)  Public Identity    Good Characters
##          EYE       HAIR             SEX             ALIVE APPEARANCES    MONTH
## 1 Hazel Eyes Brown Hair Male Characters Living Characters        4043   August
## 2  Blue Eyes White Hair Male Characters Living Characters        3360    March
## 3  Blue Eyes Black Hair Male Characters Living Characters        3061  October
## 4  Blue Eyes Black Hair Male Characters Living Characters        2961    March
## 5  Blue Eyes Blond Hair Male Characters Living Characters        2258 November
## 6  Blue Eyes    No Hair Male Characters Living Characters        2255 November
##   YEAR
## 1 1962
## 2 1941
## 3 1974
## 4 1963
## 5 1950
## 6 1961
head(dc)
##                          name              ID           ALIGN        EYE
## 1        Batman (Bruce Wayne) Secret Identity Good Characters  Blue Eyes
## 2       Superman (Clark Kent) Secret Identity Good Characters  Blue Eyes
## 3  Green Lantern (Hal Jordan) Secret Identity Good Characters Brown Eyes
## 4    James Gordon (New Earth) Public Identity Good Characters Brown Eyes
## 5 Richard Grayson (New Earth) Secret Identity Good Characters  Blue Eyes
## 6 Wonder Woman (Diana Prince) Public Identity Good Characters  Blue Eyes
##         HAIR               SEX             ALIVE APPEARANCES YEAR    MONTH
## 1 Black Hair   Male Characters Living Characters        3093 1939      May
## 2 Black Hair   Male Characters Living Characters        2496 1986  October
## 3 Brown Hair   Male Characters Living Characters        1565 1959  October
## 4 White Hair   Male Characters Living Characters        1316 1987 February
## 5 Black Hair   Male Characters Living Characters        1237 1940    April
## 6 Black Hair Female Characters Living Characters        1231 1941 December

This code snippet is part of a project that analyzes the introduction of characters over time in Marvel and DC comics.

The first two lines convert the “YEAR” column from characters to numeric values in both the Marvel and DC data frames.

The next four lines sort the Marvel and DC data frames by year, with missing values placed last.

Finally, the code prints the year of the first character introduced in Marvel and DC comics using the cat function. The output will be a message that shows the year of the first character introduced in Marvel and DC comics respectively.

# 1. Introduction of characters over time

# Convert years from characters to numeric values
marvel$YEAR <- as.numeric(marvel$YEAR)
dc$YEAR <- as.numeric(dc$YEAR)

# Sort MARVEL characters by year
marvel_sorted <- marvel[order(marvel$YEAR, na.last = NA),]

# Print the first character of MARVEL
cat("The first character of MARVEL appeared in the year ", marvel_sorted$YEAR[1], "\n")
## The first character of MARVEL appeared in the year  1939
# Sort DC characters by year
dc_sorted <- dc[order(dc$YEAR, na.last = NA),]

# Print the first character of DC
cat("The first character of DC appeared in the year ", dc_sorted$YEAR[1], "\n")
## The first character of DC appeared in the year  1935

This code snippet creates a density plot that shows the distribution of appearance of heroes in comic books over time for both Marvel and DC.

The first two lines define a ggplot object and specify the data frame and aesthetics for the plot. Two geom_density layers are added to the plot to create the density curves for Marvel and DC data. The fill parameter inside the aes function specifies the fill color of each density curve.

The alpha parameter controls the transparency of the density curves, with a value of 0.5 indicating that the curves are semi-transparent. The color parameter sets the color of the outline of each density curve.

The labs function call adds a title to the plot. The theme_minimal function call changes the theme of the plot to a minimalist style.

Finally, the scale_fill_manual function call sets the fill color of the density curves to red for Marvel and blue for DC.

ggplot() +
  geom_density(aes(x = as.numeric(marvel$YEAR), fill = "Marvel"), alpha = 0.5, color = "red") +
  geom_density(aes(x = as.numeric(dc$YEAR), fill = "DC"), alpha = 0.5, color = "blue") +
  labs(title = "Distribution of Appearance of heroes in comic in years") +
  theme_minimal() +
  scale_fill_manual(values = c("red", "blue"))
## Warning: Removed 815 rows containing non-finite values (`stat_density()`).
## Warning: Removed 69 rows containing non-finite values (`stat_density()`).

This code snippet creates a grid of two density plots for each gender category (female and male) that show the ratio of characters created over time for both Marvel and DC comics.

The first eight lines of code filter the Marvel and DC data frames by gender category (female, male, genderfluid, agender, and transgender), creating four new data frames for each gender category for each publisher.

The gender_density_plot function takes three parameters: data, which specifies the data frame to use for the plot; var, which specifies the variable to use for the x-axis; label, which specifies the title of the plot; and color, which specifies the fill color of the density curve. This function creates a density plot for the specified gender category.

The options function call sets the size of the plot. The plot_grid function creates a grid of two plots for each gender category, one for Marvel and one for DC, using the gender_density_plot function. The geom_density layer is added to each plot to create the density curve for the corresponding data frame. The ncol and nrow parameters set the number of columns and rows for the grid layout.

marvel_female_characters <- marvel %>% filter(SEX == 'Female Characters')
dc_female_characters <- dc %>% filter(SEX == 'Female Characters')

marvel_male_characters <- marvel %>% filter(SEX == 'Male Characters')
dc_male_characters <- dc %>% filter(SEX == 'Male Characters')

marvel_gf_characters <- marvel %>% filter(SEX == 'Genderfluid Characters')
dc_gf_characters <- dc %>% filter(SEX == 'Genderless Characters')

marvel_ag_characters <- marvel %>% filter(SEX == 'Agender Characters')
dc_tg_characters <- dc %>% filter(SEX == 'Transgender Characters')


gender_density_plot <- function(data, var, label, color) {
  ggplot(data = data, aes_string(x = var)) + 
    geom_density(fill = color, alpha = 0.5) + 
    labs(title = label, x = "Year") +
    theme_minimal() +
    theme(plot.title = element_text(size = rel(0.8))) # Decrease the font size of the title
}

options(repr.plot.width = 30, repr.plot.height = 8) # Increase the plot size

plot_grid(
  gender_density_plot(marvel_female_characters, 'YEAR', 'Ratio of Female characters created over the years - Marvel', 'red') +
    geom_density(data = dc_female_characters, aes(x = as.numeric(YEAR)), fill = "blue", alpha = 0.5),
  gender_density_plot(marvel_male_characters, 'YEAR', 'Ratio of Male characters created over the years - Marvel', 'red') +
    geom_density(data = dc_male_characters, aes(x = as.numeric(YEAR)), fill = "blue", alpha = 0.5),
  ncol = 2,
  nrow = 1
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`
## Warning: Removed 209 rows containing non-finite values (`stat_density()`).
## Warning: Removed 20 rows containing non-finite values (`stat_density()`).
## Warning: Removed 538 rows containing non-finite values (`stat_density()`).
## Warning: Removed 48 rows containing non-finite values (`stat_density()`).

This code snippet continues the analysis of the introduction of characters over time for Marvel and DC comics by creating four density plots for each gender category (female, male, agender/genderless, and genderfluid/transgender) that show the ratio of characters created over time for both publishers.

The first twelve lines of code add a new column called “Publisher” to each of the filtered data frames for each gender category, and assign the corresponding publisher to each data frame.

The gender_density_plot function now takes two data frame parameters, data1 and data2, for the two publishers being compared. The rbind function is used to combine the two data frames into a single data frame for plotting.

The scale_fill_manual function call sets the fill colors of the density curves for each publisher to red for Marvel and blue for DC. The name parameter sets the title of the legend to “Publisher”, and the labels parameter sets the label names for each fill color.

The options function call sets the size of the plot. Four calls to the gender_density_plot function are made to create four density plots for each gender category, one for each publisher being compared. The output will be a grid of four plots that show the ratio of characters created over time for each gender category, comparing Marvel and DC.

# Add a Publisher column to each data frame
marvel_female_characters$Publisher <- "Marvel"
dc_female_characters$Publisher <- "DC"
marvel_male_characters$Publisher <- "Marvel"
dc_male_characters$Publisher <- "DC"
marvel_ag_characters$Publisher <- "Marvel"
dc_gf_characters$Publisher <- "DC"
marvel_gf_characters$Publisher <- "Marvel"
dc_tg_characters$Publisher <- "DC"

gender_density_plot <- function(data1, data2, var, label, color1, color2) {
  combined_data <- rbind(data1, data2)
  ggplot(data = combined_data, aes_string(x = var, fill = "Publisher")) + 
    geom_density(alpha = 0.5) +
    labs(title = label, x = "Year") +
    scale_fill_manual(values = c(Marvel = color1, DC = color2),
                      name = "Publisher",
                      labels = c("Marvel", "DC")) +
    theme_minimal() +
    theme(plot.title = element_text(size = rel(0.8)))
}

options(repr.plot.width = 12, repr.plot.height = 8)

# Plot for Female Characters
female_plot <- gender_density_plot(marvel_female_characters, dc_female_characters, 'YEAR', 'Ratio of Female characters created over the years', 'red', 'blue')
print(female_plot)
## Warning: Removed 229 rows containing non-finite values (`stat_density()`).
# Plot for Male Characters
male_plot <- gender_density_plot(marvel_male_characters, dc_male_characters, 'YEAR', 'Ratio of Male characters created over the years', 'red', 'blue')
print(male_plot)
## Warning: Removed 586 rows containing non-finite values (`stat_density()`).
# Plot for Agender/Genderless Characters
ag_plot <- gender_density_plot(marvel_ag_characters, dc_gf_characters, 'YEAR', 'Ratio of Agender/Genderless characters created over the years', 'red', 'blue')
print(ag_plot)
## Warning: Removed 9 rows containing non-finite values (`stat_density()`).
# Plot for Genderfluid/Transgender Characters
gf_plot <- gender_density_plot(marvel_gf_characters, dc_tg_characters, 'YEAR', 'Ratio of Genderfluid/Transgender characters created over the years', 'red', 'blue')
print(gf_plot)
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

# When was the first female character introduced?
min(dc_female_characters$YEAR, na.rm = TRUE)
## [1] 1936
min(marvel_female_characters$YEAR, na.rm = TRUE)
## [1] 1939
min(marvel_gf_characters$YEAR, na.rm = TRUE)
## [1] 1949
min(dc_gf_characters$YEAR, na.rm = TRUE)
## [1] 1961
min(marvel_ag_characters$YEAR, na.rm = TRUE)
## [1] 1964
min(dc_tg_characters$YEAR, na.rm = TRUE)
## [1] 2009

This code snippet creates two bar plots that show the number of first appearances of characters in Marvel and DC comics by year.

The first four lines of code use the %>% pipe operator to filter out missing values from the “YEAR” column in the Marvel data frame using the na.omit function, and count the number of characters that appeared in each year using the count function. This creates a new data frame that can be plotted.

The ggplot function is used to create a new plot object, and aes is used to specify the aesthetics for the plot. The geom_bar function is used to create a bar plot where the height of each bar represents the number of characters that appeared in each year.

The labs function call adds a title to the plot. The theme_minimal function call changes the theme of the plot to a minimalist style. The theme function is used to adjust the x-axis text angle to 90 degrees, making it easier to read the year labels.

The next four lines of code follow a similar pattern as the first four lines, but for the DC data frame instead. The output will be two separate bar plots that show the number of first appearances of characters in Marvel and DC comics by year.

# First Appearances by Year
marvel %>% na.omit() %>% count(YEAR) %>%
  ggplot(aes(x = as.factor(YEAR), y = n)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Marvel First Appearances by Year") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

dc %>% na.omit() %>% count(YEAR) %>%
  ggplot(aes(x = as.factor(YEAR), y = n)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "DC First Appearances by Year") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

This code snippet analyzes the gender diversity of characters in Marvel and DC comics by creating two pie charts.

The first two lines of code use the %>% pipe operator to count the number of characters for each gender category in the Marvel and DC data frames, and filter out empty values using the filter function. This creates two new data frames that can be plotted.

The plot_ly function from the plotly package is used to create a new plot object. The labels parameter is used to specify the labels for the slices of the pie chart, and the values parameter is used to specify the values for each slice. The type parameter is set to “pie” to create a pie chart, and the name parameter sets the name of the plot.

The layout function call adds a title to the plot. The fig object is printed to display the pie chart. The code creates two pie charts, one for Marvel and one for DC, that show the gender diversity of characters.

# 2. Gender proportionality
sex_m <- marvel %>% count(SEX) %>% filter(SEX != "")
sex_dc <- dc %>% count(SEX) %>% filter(SEX != "")

fig <- plot_ly(sex_m, labels = ~SEX, values = ~n, type = "pie", name = "Marvel") %>%
  layout(title = "Gender diversity in Marvel")

fig
fig <- plot_ly(sex_dc, labels = ~SEX, values = ~n, type = "pie", name = "DC") %>%
  layout(title = "Gender diversity in DC")

fig

This code snippet creates six bar plots that show the count of characters in each gender category (Male Characters, Female Characters, Genderfluid Characters, Agender Characters, and Transgender Characters) for different attributes (Alignment, Identity, and Living Status) in Marvel and DC comics.

The first two lines of code remove rows with empty values in the “SEX” column of the Marvel and DC data frames using the filter function.

The plot_count function is defined to create a bar plot that shows the count of characters for each gender category in a given data frame and attribute. The function takes six arguments: data (the data frame to plot), x_var (the attribute to plot on the x-axis), hue_var (the variable to group by and fill the bars with), title (the title of the plot), palette (the color palette to use for the bars), and scale_x_discrete (a function that is used to customize the x-axis labels).

The ggplot function is used to create a new plot object, and aes_string is used to specify the aesthetics for the plot. The geom_bar function is used to create a bar plot where the height of each bar represents the count of characters for each gender category. The labs function call adds a title and axis labels to the plot. The theme function is used to adjust the appearance of the plot. The scale_fill_manual function is used to customize the colors of the bars.

The last six lines of code call the plot_count function with different arguments to create six separate bar plots that show the count of characters in each gender category for different attributes in Marvel and DC comics. The print function is used to display the plots.

# Remove rows with empty values in the SEX column of marvel data frame
marvel <- marvel %>% filter(SEX != "")

# Remove rows with empty values in the SEX column of dc data frame
dc <- dc %>% filter(SEX != "")

plot_count <- function(data, x_var, hue_var, title, palette) {
  ggplot(data = data, aes_string(x = x_var, fill = hue_var)) + 
    geom_bar(position = "dodge") + 
    labs(title = title, x = x_var, y = "Count") +
    theme_bw() +
    theme(
      plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
      axis.title = element_text(size = 12, face = "bold"),
      axis.text = element_text(size = 10),
      legend.title = element_text(size = 12, face = "bold"),
      legend.text = element_text(size = 10),
      panel.grid.major = element_line(color = "grey", linetype = "dashed"),
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      panel.background = element_blank()
    ) +
    scale_fill_manual(values = palette)+
    scale_x_discrete(labels = c("Good", "Bad", "Neutral", "Unknown"))
}

# Marvel Sex vs Align
plot1 <- plot_count(marvel, "ALIGN", "SEX", "Marvel Sex vs Align", c("red", "blue", "green", "orange", "purple"))
print(plot1)

# DC Sex vs Align
plot2 <- plot_count(dc, "ALIGN", "SEX", "DC Sex vs Align", c("red", "blue", "green", "orange", "purple"))
print(plot2)

# Marvel Sex vs Identity
plot3 <- plot_count(marvel, "ID", "SEX", "Marvel Sex vs Identity", c("red", "blue", "green", "orange", "purple"))
print(plot3)

# DC Sex vs Identity
plot4 <- plot_count(dc, "ID", "SEX", "DC Sex vs Identity", c("red", "blue", "green", "orange", "purple"))
print(plot4)

# Marvel Sex vs Living status
plot5 <- plot_count(marvel, "ALIVE", "SEX", "Marvel Sex vs Living status", c("red", "blue", "green", "orange", "purple"))
print(plot5)

# DC Sex vs Living status
plot6 <- plot_count(dc, "ALIVE", "SEX", "DC Sex vs Living status", c("red", "blue", "green", "orange", "purple"))
print(plot6)

This code defines a function called top_10_pie_plotly that creates pie charts using the plotly library. It takes as inputs a data frame (df), a column containing the labels for the pie chart (labels), a column containing the values for the pie chart (values), and a title for the chart (title).

The function is then used to create three pie charts: one for the top 10 Marvel characters by appearances, one for the top 10 DC characters by appearances, and one for the top 10 characters overall (combining both Marvel and DC). The code selects the top 10 characters for each data frame using arrange and head functions.

# Function for creating pie charts with plotly
top_10_pie_plotly <- function(df, labels, values, title) {
  pie_chart <- plot_ly(df, labels = labels, values = values, type = "pie") %>%
    layout(title = title)
  pie_chart
}

# Top 10 appearances in Marvel
top_10_appearances_m <- marvel %>% arrange(desc(APPEARANCES)) %>% head(10)
top_10_pie_plotly(top_10_appearances_m, top_10_appearances_m$name, top_10_appearances_m$APPEARANCES, "Top 10 Marvel Characters by Appearances")
# Top 10 appearances in DC
top_10_appearances_dc <- dc %>% arrange(desc(APPEARANCES)) %>% head(10)
top_10_pie_plotly(top_10_appearances_dc, top_10_appearances_dc$name, top_10_appearances_dc$APPEARANCES, "Top 10 DC Characters by Appearances")
# Combining Marvel and DC data
dc_marvel <- rbind(dc, marvel)

# Top 10 appearances in combined dataset
top_10_appearances_dc_marvel <- dc_marvel %>% arrange(desc(APPEARANCES)) %>% head(10)
top_10_pie_plotly(top_10_appearances_dc_marvel, top_10_appearances_dc_marvel$name, top_10_appearances_dc_marvel$APPEARANCES, "Top 10 DC and Marvel Characters by Appearances")